#!/usr/bin/python
# -*- coding: utf-8 -*-

import re
import sys
import socket
import urllib

socket.setdefaulttimeout(15)

f = int(sys.argv[1])
t = int(sys.argv[2])

for i in range(f, t + 1):
    if i % 1000 == 1:
        if i != f:
            out.close()
        out = open('pages/' + str(i / 1000), 'w')
    try:
        data = urllib.urlopen('http://baike.baidu.com/view/' + str(i) + '.htm').read()
        data = re.sub(r'\n', ' ',  data)
        out.write(data + '\n')
    except Exception, data:
        print Exception, ': ', data, i
        out.write('\n')

out.close()
